# -*- coding: utf-8 -*-

# Scrapy settings for csdn project
#
# For simplicity, this file contains only settings considered important or
# commonly used. You can find more settings consulting the documentation:
#
#     https://docs.scrapy.org/en/latest/topics/settings.html
#     https://docs.scrapy.org/en/latest/topics/downloader-middleware.html
#     https://docs.scrapy.org/en/latest/topics/spider-middleware.html

BOT_NAME = 'csdn'

SPIDER_MODULES = ['csdn.spiders']
NEWSPIDER_MODULE = 'csdn.spiders'


# Crawl responsibly by identifying yourself (and your website) on the user-agent
USER_AGENT = 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36'


# Obey robots.txt rules
ROBOTSTXT_OBEY = False

# Configure maximum concurrent requests performed by Scrapy (default: 16)
#CONCURRENT_REQUESTS = 32

# Configure a delay for requests for the same website (default: 0)
# See https://docs.scrapy.org/en/latest/topics/settings.html#download-delay
# See also autothrottle settings and docs
#DOWNLOAD_DELAY = 3
# The download delay setting will honor only one of:
#CONCURRENT_REQUESTS_PER_DOMAIN = 16
#CONCURRENT_REQUESTS_PER_IP = 16

# Disable cookies (enabled by default)
#COOKIES_ENABLED = False

# Disable Telnet Console (enabled by default)
#TELNETCONSOLE_ENABLED = False

# Override the default request headers:
DEFAULT_REQUEST_HEADERS = {
   'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
   'Accept-Language': 'en',
    'Cookie':'uuid_tt_dd=10_37288045120-1583464483482-836246; dc_session_id=10_1583464483482.900714; Hm_lvt_08e81a4274b754c777bb34d05a011db1=1583731645; Hm_ct_08e81a4274b754c777bb34d05a011db1=6525*1*10_37288045120-1583464483482-836246; __yadk_uid=VTLC0NQUfJ71NBaFQNkEGLIT9kViPktt; UserName=fanfzj; UserInfo=5a8a013c9ba64640bc96e9e731142e66; UserToken=5a8a013c9ba64640bc96e9e731142e66; UserNick=%E5%AD%90%E6%B2%90%E8%80%81%E5%8F%B8; AU=45D; UN=fanfzj; BT=1585300482640; p_uid=U100000; Hm_lvt_3b20a7d9064e546c823583b417d6b4cb=1586236347,1586412395; Hm_ct_3b20a7d9064e546c823583b417d6b4cb=5744*1*fanfzj!6525*1*10_37288045120-1583464483482-836246; Hm_lvt_68822ecd314ca264253e255a3262d149=1592717132; Hm_up_68822ecd314ca264253e255a3262d149=%7B%22islogin%22%3A%7B%22value%22%3A%221%22%2C%22scope%22%3A1%7D%2C%22isonline%22%3A%7B%22value%22%3A%221%22%2C%22scope%22%3A1%7D%2C%22isvip%22%3A%7B%22value%22%3A%221%22%2C%22scope%22%3A1%7D%2C%22uid_%22%3A%7B%22value%22%3A%22fanfzj%22%2C%22scope%22%3A1%7D%7D; Hm_ct_68822ecd314ca264253e255a3262d149=5744*1*fanfzj!6525*1*10_37288045120-1583464483482-836246; Hm_lvt_a638f19c4d8f6e9569e94a045cecf534=1592717162; Hm_up_a638f19c4d8f6e9569e94a045cecf534=%7B%22islogin%22%3A%7B%22value%22%3A%221%22%2C%22scope%22%3A1%7D%2C%22isonline%22%3A%7B%22value%22%3A%221%22%2C%22scope%22%3A1%7D%2C%22isvip%22%3A%7B%22value%22%3A%221%22%2C%22scope%22%3A1%7D%2C%22uid_%22%3A%7B%22value%22%3A%22fanfzj%22%2C%22scope%22%3A1%7D%7D; Hm_ct_a638f19c4d8f6e9569e94a045cecf534=5744*1*fanfzj!6525*1*10_37288045120-1583464483482-836246; Hm_lvt_62052699443da77047734994abbaed1b=1596434175; Hm_up_62052699443da77047734994abbaed1b=%7B%22islogin%22%3A%7B%22value%22%3A%221%22%2C%22scope%22%3A1%7D%2C%22isonline%22%3A%7B%22value%22%3A%221%22%2C%22scope%22%3A1%7D%2C%22isvip%22%3A%7B%22value%22%3A%221%22%2C%22scope%22%3A1%7D%2C%22uid_%22%3A%7B%22value%22%3A%22fanfzj%22%2C%22scope%22%3A1%7D%7D; Hm_ct_62052699443da77047734994abbaed1b=5744*1*fanfzj!6525*1*10_37288045120-1583464483482-836246; Hm_lvt_e5ef47b9f471504959267fd614d579cd=1595491477,1597030374,1597030621; Hm_up_e5ef47b9f471504959267fd614d579cd=%7B%22islogin%22%3A%7B%22value%22%3A%221%22%2C%22scope%22%3A1%7D%2C%22isonline%22%3A%7B%22value%22%3A%221%22%2C%22scope%22%3A1%7D%2C%22isvip%22%3A%7B%22value%22%3A%221%22%2C%22scope%22%3A1%7D%2C%22uid_%22%3A%7B%22value%22%3A%22fanfzj%22%2C%22scope%22%3A1%7D%7D; Hm_ct_e5ef47b9f471504959267fd614d579cd=5744*1*fanfzj!6525*1*10_37288045120-1583464483482-836246; Hm_lvt_5c2049f0420b71ee20ff0f2896670ca2=1597655043; Hm_up_5c2049f0420b71ee20ff0f2896670ca2=%7B%22islogin%22%3A%7B%22value%22%3A%221%22%2C%22scope%22%3A1%7D%2C%22isonline%22%3A%7B%22value%22%3A%221%22%2C%22scope%22%3A1%7D%2C%22isvip%22%3A%7B%22value%22%3A%221%22%2C%22scope%22%3A1%7D%2C%22uid_%22%3A%7B%22value%22%3A%22fanfzj%22%2C%22scope%22%3A1%7D%7D; Hm_ct_5c2049f0420b71ee20ff0f2896670ca2=5744*1*fanfzj!6525*1*10_37288045120-1583464483482-836246; searchHistoryArray=%255B%2522%25E6%2595%25B0%25E6%258D%25AE%25E5%2588%2586%25E6%259E%2590%25E9%259D%25A2%25E8%25AF%2595%25E9%25A2%2598%2522%255D; Hm_lvt_4a20bfe8e339184241f52b1b2c53e116=1598499159; Hm_up_4a20bfe8e339184241f52b1b2c53e116=%7B%22islogin%22%3A%7B%22value%22%3A%221%22%2C%22scope%22%3A1%7D%2C%22isonline%22%3A%7B%22value%22%3A%221%22%2C%22scope%22%3A1%7D%2C%22isvip%22%3A%7B%22value%22%3A%221%22%2C%22scope%22%3A1%7D%2C%22uid_%22%3A%7B%22value%22%3A%22fanfzj%22%2C%22scope%22%3A1%7D%7D; Hm_ct_4a20bfe8e339184241f52b1b2c53e116=5744*1*fanfzj!6525*1*10_37288045120-1583464483482-836246; announcement=%257B%2522isLogin%2522%253Atrue%252C%2522announcementUrl%2522%253A%2522https%253A%252F%252Flive.csdn.net%252Froom%252Fyzkskaka%252F5n5O4pRs%253Futm_source%253D1598583200%2522%252C%2522announcementCount%2522%253A0%257D; dc_sid=9d31f067eae843ba4457c74f4a31df65; TY_SESSION_ID=561b9167-3775-435e-a34a-0dbb1bba966d; c_utm_source=blogxgwz4; log_Id_pv=34; log_Id_click=29; log_Id_view=32; c_utm_medium=distribute.pc_feed.none-task-blog-vip_agree_hot-1.nonecase; c_adb=1; c_first_ref=www.baidu.com; c_first_page=https%3A//blog.csdn.net/l1336037686/article/details/78536694; c_segment=6; c_page_id=default; dc_tos=qgf93b; c_ref=https%3A//www.baidu.com/link; Hm_lvt_6bcd52f51e9b3dce32bec4a3997715ac=1599645730,1599705976,1599705994,1599706056; Hm_lpvt_6bcd52f51e9b3dce32bec4a3997715ac=1599706056; Hm_up_6bcd52f51e9b3dce32bec4a3997715ac=%7B%22uid_%22%3A%7B%22value%22%3A%22fanfzj%22%2C%22scope%22%3A1%7D%2C%22islogin%22%3A%7B%22value%22%3A%221%22%2C%22scope%22%3A1%7D%2C%22isonline%22%3A%7B%22value%22%3A%221%22%2C%22scope%22%3A1%7D%2C%22isvip%22%3A%7B%22value%22%3A%221%22%2C%22scope%22%3A1%7D%7D; Hm_ct_6bcd52f51e9b3dce32bec4a3997715ac=6525*1*10_37288045120-1583464483482-836246!5744*1*fanfzj',
    'User-agent':'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Maxthon/5.0 Chrome/47.0.2526.73 Safari/537.36'
}

# Enable or disable spider middlewares
# See https://docs.scrapy.org/en/latest/topics/spider-middleware.html
SPIDER_MIDDLEWARES = {
   #'csdn.middlewares.CsdnSpiderMiddleware': 543,
    'csdn.middlewares.UserAgentmiddleware':401,
}

# Enable or disable downloader middlewares
# See https://docs.scrapy.org/en/latest/topics/downloader-middleware.html
DOWNLOADER_MIDDLEWARES = {
   #'csdn.middlewares.CsdnDownloaderMiddleware': 543,
'csdn.middlewares.UserAgentmiddleware': 400,
'csdn.middlewares.CookieMiddleware': 700,
}

# Enable or disable extensions
# See https://docs.scrapy.org/en/latest/topics/extensions.html
#EXTENSIONS = {
#    'scrapy.extensions.telnet.TelnetConsole': None,
#}

# Configure item pipelines
# See https://docs.scrapy.org/en/latest/topics/item-pipeline.html
ITEM_PIPELINES = {
    'scrapy_redis.pipelines.RedisPipeline': 300,
   'csdn.pipelines.CsdnPipeline': 400,
}

# Enable and configure the AutoThrottle extension (disabled by default)
# See https://docs.scrapy.org/en/latest/topics/autothrottle.html
#AUTOTHROTTLE_ENABLED = True
# The initial download delay
#AUTOTHROTTLE_START_DELAY = 5
# The maximum download delay to be set in case of high latencies
#AUTOTHROTTLE_MAX_DELAY = 60
# The average number of requests Scrapy should be sending in parallel to
# each remote server
#AUTOTHROTTLE_TARGET_CONCURRENCY = 1.0
# Enable showing throttling stats for every response received:
#AUTOTHROTTLE_DEBUG = False

# Enable and configure HTTP caching (disabled by default)
# See https://docs.scrapy.org/en/latest/topics/downloader-middleware.html#httpcache-middleware-settings
#HTTPCACHE_ENABLED = True
#HTTPCACHE_EXPIRATION_SECS = 0
#HTTPCACHE_DIR = 'httpcache'
#HTTPCACHE_IGNORE_HTTP_CODES = []
#HTTPCACHE_STORAGE = 'scrapy.extensions.httpcache.FilesystemCacheStorage'

# redis配置
REDIS_HOST = 'redis.fzj.com'
REDIS_PORT = 9211
REDIS_PARAMS = {
    'password': 'FanTan879425',#密码
    'db':9#选择redis库
}
REDIS_URL='redis://:FanTan879425@redis.fzj.com:9211'

# 数据库配置
MYSQL_HOST = 'redis.fzj.com'
MYSQL_PORT = 3306
MYSQL_PARAMS = {
    'user':'fzj',
    'password': 'FanYan879688',
    'db':'csdn'
}

# 调度器启用Redis存储Requests队列
SCHEDULER = "scrapy_redis.scheduler.Scheduler"

# 确保所有的爬虫实例使用Redis进行重复过滤
DUPEFILTER_CLASS = "scrapy_redis.dupefilter.RFPDupeFilter"

# 将Requests队列持久化到Redis，可支持暂停或重启爬虫
SCHEDULER_PERSIST = True

# Requests的调度策略，默认优先级队列
SCHEDULER_QUEUE_CLASS = 'scrapy_redis.queue.PriorityQueue'
